Setup
Parameters
print(params)
## $gene.metadata.path
## [1] "../../data/gene_metadata"
##
## $input.samples.rds.path
## [1] "../../results/01_Filter_Samples"
##
## $input.variability.rds.path
## [1] "../../results/02_Variability_Jackknife"
##
## $output.rds.path
## [1] "../../results/03_Variability_Plots"
##
## $species
## [1] "DRE"
##
## $species.name
## [1] "Danio rerio"
##
## $min.percentile
## [1] 0
##
## $max.percentile
## [1] 0.95
##
## $all.nonzero.matrix
## [1] TRUE
Functions
source("../functions/helper_functions.R")
source("../functions/variability_jackknife.R")
source("../functions/variability_plots.R")
Session Info
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 11.7.8
##
## Matrix products: default
## LAPACK: /Users/cbucao/miniforge3/envs/fish-ev-gene-function/lib/libopenblasp-r0.3.25.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] cowplot_1.1.1 ggpubr_0.4.0 slider_0.2.2 viridis_0.6.3 viridisLite_0.4.1
## [6] gplots_3.1.3 ggplot2_3.3.6 matrixStats_0.62.0 edgeR_3.34.1 limma_3.48.3
## [11] colorBlindness_0.1.9 dplyr_1.0.9
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.6 tidyr_1.2.1 splines_4.1.0 jsonlite_1.8.9 carData_3.0-5
## [6] warp_0.2.0 gtools_3.9.4 bslib_0.5.0 BiocManager_1.30.21 highr_0.10
## [11] yulab.utils_0.1.8 renv_0.17.3 yaml_2.3.7 pillar_1.7.0 backports_1.4.1
## [16] lattice_0.21-8 glue_1.8.0 digest_0.6.37 ggsignif_0.6.4 colorspace_2.1-0
## [21] Matrix_1.5-4.1 htmltools_0.5.5 pkgconfig_2.0.3 broom_1.0.5 purrr_0.3.5
## [26] tidytree_0.4.6 scales_1.2.1 tibble_3.1.7 mgcv_1.8-42 generics_0.1.3
## [31] farver_2.1.1 car_3.1-2 ellipsis_0.3.2 cachem_1.0.8 withr_3.0.2
## [36] lazyeval_0.2.2 cli_3.6.3 magrittr_2.0.3 crayon_1.5.3 evaluate_1.0.1
## [41] fs_1.6.5 fansi_1.0.4 MASS_7.3-58.3 nlme_3.1-162 rstatix_0.7.2
## [46] tools_4.1.0 lifecycle_1.0.4 munsell_0.5.0 locfit_1.5-9.7 isoband_0.2.7
## [51] compiler_4.1.0 jquerylib_0.1.4 caTools_1.18.2 gridGraphics_0.5-1 rlang_1.1.4
## [56] grid_4.1.0 bitops_1.0-7 labeling_0.4.2 rmarkdown_2.22 gtable_0.3.3
## [61] abind_1.4-5 DBI_1.1.3 R6_2.5.1 gridExtra_2.3 knitr_1.43
## [66] fastmap_1.1.1 utf8_1.2.3 treeio_1.16.2 KernSmooth_2.23-21 ape_5.8
## [71] Rcpp_1.0.10 vctrs_0.4.1 tidyselect_1.2.0 xfun_0.39
Data
Processed data
# Load saved data from 01_Filter_Samples.Rmd
# Normalized by condition
if (!params$all.nonzero.matrix) {
filtered.samples <- readRDS(
file.path(params$input.samples.rds.path,
paste(params$species, "4_reps_data.conditions.rds", sep="_")))
} else {
filtered.samples <- readRDS(
file.path(params$input.samples.rds.path,
paste(params$species, "4_reps_data.conditions.nonzero.rds", sep="_")))
}
Tissue <- as.factor(filtered.samples$metadata$Tissue)
if ("Sex" %in% colnames(filtered.samples$metadata)) {
Sex <- as.factor(filtered.samples$metadata$Sex)
}
# Load jackknifed expression variability estimates from 02_Variability_Jackknife.Rmd
## Adjusted SD
if (!params$all.nonzero.matrix) {
jack.adj.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.adj.sd.rds", sep="_")))
## Residual SD
jack.resid.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.sd.rds", sep="_")))
## Residual CV / Local (residual) CV
jack.resid.cv <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.cv.rds", sep="_")))
} else {
jack.adj.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.adj.sd.nonzero.rds", sep="_")))
## Residual SD
jack.resid.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.sd.nonzero.rds", sep="_")))
## Residual CV / Local (residual) CV
jack.resid.cv <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.cv.nonzero.rds", sep="_")))
}
Main
Initial summary statistics
if ("Sex" %in% colnames(filtered.samples$metadata)) {
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
compute_gene_summary_stats(filtered.samples$log2.tmm.cpm.gf[[t]][[s]])
})
})
# Remove top and bottom x% of genes by expression level
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
summary.stats[[t]][[s]][
summary.stats[[t]][[s]]$Rank_Mean>params$min.percentile &
summary.stats[[t]][[s]]$Rank_Mean<params$max.percentile,]
})
})
# Remove missing conditions
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
summary.stats[[t]][[s]][complete.cases(summary.stats[[t]][[s]]),]
})
})
} else {
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
compute_gene_summary_stats(filtered.samples$log2.tmm.cpm.gf[[t]])
})
# Remove top and bottom x% of genes by expression level
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
summary.stats[[t]][
summary.stats[[t]]$Rank_Mean>params$min.percentile &
summary.stats[[t]]$Rank_Mean<params$max.percentile,]
})
}
Jackknife
Summarized results
# Adjusted SD
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.adj.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.adj.sd[[t]][[s]])) {
jack.adj.sd[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
})
})
} else {
jack.adj.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.adj.sd[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
})
}
# Residual SD
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.resid.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.resid.sd[[t]][[s]])) {
jack.resid.sd[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
})
})
} else {
jack.resid.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.sd[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
})
}
# Residual CV
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.resid.cv.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.resid.cv[[t]][[s]])) {
jack.resid.cv[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
})
})
} else {
jack.resid.cv.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.cv[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
})
}